{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Q-Learning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+\n",
      "|R: | : :G|\n",
      "| : : : : |\n",
      "| : : : : |\n",
      "| | :\u001b[43m \u001b[0m| : |\n",
      "|\u001b[34;1mY\u001b[0m| : |\u001b[35mB\u001b[0m: |\n",
      "+---------+\n",
      "\n",
      "351 -10 False\n",
      "+---------+\n",
      "|R: | : :G|\n",
      "| : : : : |\n",
      "| : : : : |\n",
      "| | :\u001b[43m \u001b[0m| : |\n",
      "|\u001b[34;1mY\u001b[0m| : |\u001b[35mB\u001b[0m: |\n",
      "+---------+\n",
      "  (Dropoff)\n",
      "331 -1 False\n",
      "+---------+\n",
      "|R: | : :G|\n",
      "| : : : : |\n",
      "| : : : : |\n",
      "| |\u001b[43m \u001b[0m: | : |\n",
      "|\u001b[34;1mY\u001b[0m| : |\u001b[35mB\u001b[0m: |\n",
      "+---------+\n",
      "  (West)\n",
      "331 -10 False\n",
      "+---------+\n",
      "|R: | : :G|\n",
      "| : : : : |\n",
      "| : : : : |\n",
      "| |\u001b[43m \u001b[0m: | : |\n",
      "|\u001b[34;1mY\u001b[0m| : |\u001b[35mB\u001b[0m: |\n",
      "+---------+\n",
      "  (Pickup)\n",
      "231 -1 False\n",
      "+---------+\n",
      "|R: | : :G|\n",
      "| : : : : |\n",
      "| :\u001b[43m \u001b[0m: : : |\n",
      "| | : | : |\n",
      "|\u001b[34;1mY\u001b[0m| : |\u001b[35mB\u001b[0m: |\n",
      "+---------+\n",
      "  (North)\n",
      "231 -10 False\n"
     ]
    }
   ],
   "source": [
    "import gym\n",
    "import numpy as np\n",
    "import random\n",
    "from collections import deque\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "env = gym.make('Taxi-v2')\n",
    "env.reset()\n",
    "\n",
    "for t in range(5):\n",
    "    env.render()\n",
    "    action = env.action_space.sample()\n",
    "    observation, reward, done, info = env.step(action)\n",
    "    \n",
    "    # state which taxi is in is literally represented by a number.\n",
    "    print(observation, reward, done)\n",
    "\n",
    "    if done:\n",
    "        print(\"Episode finished after {} timesteps\".format(t+1))\n",
    "        break\n",
    "env.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "# CONSTANTS\n",
    "\n",
    "ALPHA = 0.05\n",
    "ALPHA_START = 0.1\n",
    "ALPHA_TAPER = 0.01\n",
    "GAMMA = 0.95\n",
    "\n",
    "EPSILON_START = 1\n",
    "EPSILON_TAPER = 0.01\n",
    "\n",
    "NUM_EPISODES = 500\n",
    "MAX_STEPS = env.spec.timestep_limit\n",
    "\n",
    "# Logging\n",
    "reward_history = deque(maxlen=1000)\n",
    "\n",
    "# Initialise Q table\n",
    "\n",
    "available_state_size = env.observation_space.n\n",
    "available_action_size = env.action_space.n\n",
    "\n",
    "Q = np.zeros((available_state_size, available_action_size))\n",
    "\n",
    "update_counts = np.zeros((available_state_size, available_action_size), dtype=np.dtype(int))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "def greed_epsilon(Q, state, eps):\n",
    "    if ( random.random() < (1-eps)):\n",
    "        # index of largest action no the value of the largest action\n",
    "        return np.argmax(Q[state])\n",
    "    else:\n",
    "        return env.action_space.sample()\n",
    "\n",
    "def update_q_table(state, action, reward, next_state, Q):\n",
    "    alpha = ALPHA_START / (1.0 + update_counts[state][action] * ALPHA_TAPER)\n",
    "    update_counts[state][action] += 1\n",
    "    \n",
    "    current_Q = Q[state, action]\n",
    "    max_a_Q = Q[next_state, np.argmax((Q[next_state]))]\n",
    "\n",
    "    Q[state, action] = current_Q + alpha * ( reward +  GAMMA * max_a_Q - current_Q)\n",
    "    \n",
    "    return Q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epi:  0 \tmoving average reward:  nan \tEps:  1.0\n",
      "epi:  1000 \tmoving average reward:  -165.902 \tEps:  0.09090909090909091\n",
      "epi:  2000 \tmoving average reward:  0.49 \tEps:  0.047619047619047616\n",
      "epi:  3000 \tmoving average reward:  5.966 \tEps:  0.03225806451612903\n",
      "epi:  4000 \tmoving average reward:  7.111 \tEps:  0.024390243902439025\n"
     ]
    }
   ],
   "source": [
    "reward_log = []\n",
    "\n",
    "for episode in range(5000):\n",
    "    total_reward = 0\n",
    "\n",
    "    eps = EPSILON_START / (1.0 + episode * EPSILON_TAPER)\n",
    "    \n",
    "    if (episode % 1000 == 0):\n",
    "        print(\"epi: \" ,episode, \"\\tmoving average reward: \",np.mean(reward_history) , \"\\tEps: \", eps)\n",
    "\n",
    "    state = env.reset()\n",
    "    \n",
    "    while t in range(MAX_STEPS):\n",
    "\n",
    "        action = greed_epsilon(Q, state, eps)\n",
    "        next_state, reward, done, info = env.step(int(action))\n",
    "        Q = update_q_table(state, int(action), reward, next_state, Q)\n",
    "        \n",
    "        state = next_state\n",
    "        \n",
    "        total_reward += reward\n",
    "\n",
    "        \n",
    "        if done:\n",
    "            break\n",
    "    \n",
    "    reward_history.append(total_reward)\n",
    "    reward_log.append(np.mean(reward_history))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1208b6ac8>]"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD8CAYAAAB6paOMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl0XPV99/H3V6PFsrzvq/CCWWxwwBE2zkIhNdhQihvapiYLzupAIE/TpuVAOVmaHJ6kSfMkTUJCnOAGEooxZEFJTRw7IUsbvMjGBhm8CNlGkjfZsrxqHX2fP+bKHoxkyZ6R7mju53XOMHd+9869358Yz2fubu6OiIhEV07YBYiISLgUBCIiEacgEBGJOAWBiEjEKQhERCJOQSAiEnEKAhGRiFMQiIhEnIJARCTicsMuoDtGjBjhkyZNCrsMEZE+ZePGjYfcfWRX0/WJIJg0aRJlZWVhlyEi0qeY2Z7uTKdNQyIiEacgEBGJOAWBiEjEKQhERCJOQSAiEnEKAhGRiFMQiIhEXGjnEZjZAuA/gBjwA3f/cli1iITJ3Ym3OfHgubXN8TaIu+PuONB+R1nH4fRwggX/aQve09Z2Zj5t7rTE22hrO/N+D2bR1j5/hzZP1NH+fGb8mefT7z1r+ee63a2TPN9gXknvaZ9v+/CZv8kb55H8t+qonTdM38H83jCPN097ppbEQPvf/HSdvLHfb6gj6f/NG6d5c3tyH04vjzf/HfzMxIwZXMh75xTTk0IJAjOLAQ8DNwLVwAYzK3X3V8KoR7Kfu9MSdxqa45xsbuVkUysnm+Ocan9ubqWhOU5zvI3m1jaagkdiOJ543dJGc7yNeFsbLXGnNd5Ga1vii7Y1nnhuCZ5b25zm1jZa2860Jeog8aWf9MUvci5XFw/JziAAZgMV7l4JYGbLgYWAgkDO6WRTK/uPNXK8sZWjDS3Un2oOntsfzdQ3nHk+1tDCyaY4ja1xzvHDtVN5MaNfboyCvBwKcmPkxYzcWA65OUZeLIfcmJGXk0N+bg5FBbnkxXLIi50Zlx885+YktsLmmJEbM3LMiOVAzIycnPbXRm5O4jnHjByDnBwD2n/1nxkOBjHs9K/sWDDP9nm1P/JiOcTa3xC8N8cMM7BgOUbwHLTnnH4O2jgzbftyk0rq1BuWdXoZwRws0Zecs2o7PUzH7WfP/8xw0vTnMb83zDqos7229r63v+f0K0v+f3Bm2Wf/v3ljfW9u7/S9Xf1h0yysIBgPVCW9rgbmJE9gZkuAJQDFxT2bhpIZ3J1DJ5rZffgkuw+dpKruFLUnmqg93kT1kQb21jdwrLG10/cPKMhlcGEeQ/onHmMHFzKoMJei/FwK82Pkx3LoX5DLgIIY/fNzKSqIUZSfS1FBYny/vBgFuTnkxXIoyE08evsfpEgYMvZaQ+6+FFgKUFJSovXnLNTUGufF1+v5485a1lXWUb73KI0tbafH5xgMKypgxIB8JgwtZM7kYYwdUsiYQf0YVJj40h9cmM+Q/nkMLswjL6ZjH0QuRFhBUANMTHo9IWiTLBVvc57fdpCdB09QcfAEuw+fZGvwxZ+bY1wxfjB3zC6meFh/Jo0oYtLwIsYPKSQ/V1/uIj0trCDYAEwzs8kkAmAR8N6QapEe0tzaxnPl+1hbWcdvXj3AweNNAIweVMBFw4q4Y3Yx104ZztypwxnULy/kakWiK5QgcPdWM7sXWEXi8NFl7r41jFokvdydrXuP8ZNN1fx0Uw1HG1oY2C+XuVOGc/usCbz94uEM1Je+SEYJbR+Bu68EVoa1fEmv1ngbv3xpH9/87U4qa0+SH8vhxhmjWXTNRN4+dcTpo19EJPNk7M5i6RtONbfyX+te54d/2k31kQYuGzOQL91+JfNnjGFYUX7Y5YlINygI5II0NMdZ+odKfvDHSo43tTJ70jAevOVy5s8Yo1//In2MgkDO2++2H+Qzz5ZTVdfATdNH87HrplBy0VAdcy/SRykIpNsaW+J8vnQryzdUMWVkEU9+7FrmTh0edlkikiIFgXTLlqp6/uGpzVQeOsnd10/lU/OmUZAbC7ssEUkDBYGck7uz8uX9/NPTWxhWlM+PPjKbd04bGXZZIpJGCgLplLvzpee2sfQPlbxlwmB+sPgaRg4sCLssEUkzBYF0qLElzn3PvETplr28b04xn/3L6doUJJKlFATyJqeaW/nID8tYu+sw/3TTJXzi+ot1SKhIFlMQyBucaGrlw/+5gbI9dXzj765i4VXjwy5JRHqYgkBOO97Ywgf/cwObq+r51h2z+IuZY8MuSUR6gYJAgMRloj/+o41sqarn4fdezYIrFAIiUaEgEAC+vnoHf3rtMF/5m5kKAZGI0V0/hOe3H+Tbz1fwnpIJvKdkYtdvEJGsoiCIuOojp/jnp7dw2ZiBfGHhFWGXIyIhUBBEWGu8jbt/vImm1ja+dcfV9MvTeQIiUaR9BBH2/T/u4uWao3znfbOYNnpg2OWISEi0RhBRFQdP8PU1O5g/YzS3XKmdwyJRllIQmNnfmtlWM2szs5Kzxj1gZhVmtt3M5ie1LwjaKszs/lSWLxcm3ub88zNb6J8f44t/pf0CIlGX6hpBOXA78IfkRjObDiwCZgALgO+YWczMYsDDwM3AdOCOYFrpRSvKqnjx9Xo+e+t0Rg3sF3Y5IhKylPYRuPurQEd3ploILHf3JmCXmVUAs4NxFe5eGbxveTDtK6nUId139FQLX121ndmThvHuq3X5CBHpuX0E44GqpNfVQVtn7dJL/v3X26k/1cznb5uhW0uKCNCNNQIzWwOM6WDUg+7+bPpLOr3cJcASgOLi4p5aTKRsqarnR2v38IFrL2L6uEFhlyMiGaLLIHD3eRcw3xog+RTVCUEb52g/e7lLgaUAJSUlfgE1yFm+sWYHQ/vncd+CS8MuRUQySE9tGioFFplZgZlNBqYB64ENwDQzm2xm+SR2KJf2UA2SZHNVPc9vr+Vj101hYL+8sMsRkQyS0s5iM3s38C1gJPDfZrbZ3ee7+1YzW0FiJ3ArcI+7x4P33AusAmLAMnffmlIPpFu+/dsKhvTP4865k8IuRUQyTKpHDf0M+Fkn4x4CHuqgfSWwMpXlyvnZc/gkv9l2gE/ecDEDCnQyuYi8kc4sjoAf/mk3uTnG+6+9KOxSRCQDKQiy3PHGFp4uq+bWmeMYNUgnj4nImykIstyKsurEfYjfPjnsUkQkQykIspi78+T615lVPIQrJwwOuxwRyVAKgiz26r7jVBw8wbtnTQi7FBHJYAqCLFa6ZS+xHOOWKzo6MVxEJEFBkKXcnV9s2cs7Lh7B8AEFYZcjIhlMQZClNr1eT019A7e9ZVzYpYhIhlMQZKlfbNlLQW4ON80YHXYpIpLhFARZqDXexi9f2se7Lhul6wqJSJcUBFlobWUdh040abOQiHSLgiALlW6pYUBBLjdcNirsUkSkD1AQZJnGlji/Kt/PTTNG0y8vFnY5ItIHKAiySPWRU7zney9wrLGVv33rxK7fICJCipehlszxPzsPce+Tm4jHnaUfeCtzpw4PuyQR6SMUBH2cu/Po/+zi/658lYtHDeB7Hyhh8oiisMsSkT5EQdCHNbbE+ZefvcxPN9Uwf8Zovvaeq3TjGRE5b/rW6KPqTjbzscfL2LjnCP8w7xI++a6LycmxsMsSkT5IQdAHVdae4M5l6zl4vImH3zuLv5g5NuySRKQPS+moITP7qpltM7OXzOxnZjYkadwDZlZhZtvNbH5S+4KgrcLM7k9l+VFUXnOUv33kBRqa4zy15FqFgIikLNXDR1cDV7j7TGAH8ACAmU0HFgEzgAXAd8wsZmYx4GHgZmA6cEcwrXRD2e467vj+Wgpyc3j6rrlcXTw07JJEJAukFATu/mt3bw1ergXa74CyEFju7k3uvguoAGYHjwp3r3T3ZmB5MK104fc7ann/o+sYOaCAp+9+G1NGDgi7JBHJEuk8oezDwHPB8HigKmlcddDWWbucw8qX9/HRxzYwZcQAVtw1l/FDCsMuSUSySJc7i81sDdDRLa4edPdng2keBFqBJ9JVmJktAZYAFBcXp2u2fc6KDVXc/9OXmFU8lEc/eA2DC3U1URFJry6DwN3nnWu8mX0QuBX4c3f3oLkGSL7GwYSgjXO0n73cpcBSgJKSEu9ommz3o7V7+MzPy7nukpF87/1vpTBf1w4SkfRL9aihBcB9wG3ufippVCmwyMwKzGwyMA1YD2wAppnZZDPLJ7FDuTSVGrLVs5tr+Oyz5cy7fBQ/uLNEISAiPSbV8wi+DRQAq80MYK273+XuW81sBfAKiU1G97h7HMDM7gVWATFgmbtvTbGGrPP89oN8esUW5kwexrffO4v8XF0bUER6jp3ZmpO5SkpKvKysLOwyesWm14/wvu+vY+qoIp782LW6w5iIXDAz2+juJV1Np5+aGWTb/mN8cNl6Rg0qYNkHr1EIiEivUBBkiKq6U3zg0fUU5sf48UfmMGpgv7BLEpGI0LWGMkD9qWY+8Og6mlvbeOauuUwc1j/skkQkQrRGEDJ359MrtlBT38Cji0uYNnpg2CWJSMQoCEL2wz/t5jfbDvIvt1xOyaRhYZcjIhGkIAjRrkMn+bdfbeP6S0fywbdNCrscEYkoBUFIWuNt/P3yFynIjfHl22cSnIchItLrtLM4JMv+dxcvVR/l4ffOYsxgHSEkIuHRGkEIXq4+ytd+vYN5l4/mlis7up6fiEjvURD0siMnm/no4xsYVpTPl26/UpuERCR02jTUyz5XupUjJ1t4+q65jBxYEHY5IiJaI+hNz28/SOmWvdx9/VTeMnFI128QEekFCoJecryxhbt/vJGpI4v4xA1Twy5HROQ0BUEv+fJz22hsaeMLC6+gIFf3FhCRzKEg6AUvvHaYJ9a9zuK5F/H2i0eEXY6IyBsoCHrYiaZW7vvJFoqH9ef+my8PuxwRkTfRUUM97JHfvUZVXQNPLblWt5sUkYykNYIeVFPfwPf/WMnCq8YxZ8rwsMsREemQgqAHffVX2wC4b8FlIVciItK5lILAzL5oZi+Z2WYz+7WZjQvazcy+aWYVwfhZSe9ZbGY7g8fiVDuQqTbuOcLPN+/lo++czPghhWGXIyLSqVTXCL7q7jPd/Srgl8Bng/abgWnBYwnwXQAzGwZ8DpgDzAY+Z2ZDU6wh47S1OZ8rLWd4UT53/ZnOGRCRzJZSELj7saSXRYAHwwuBxz1hLTDEzMYC84HV7l7n7keA1cCCVGrIRM+V76e85hj333yZbkAvIhkv5aOGzOwh4E7gKHBD0DweqEqarDpo66w9a7TG2/ja6u1cMnoAt8+aEHY5IiJd6nKNwMzWmFl5B4+FAO7+oLtPBJ4A7k1XYWa2xMzKzKystrY2XbPtcb94aS+VtSf5xxsvJZajK4uKSObrco3A3ed1c15PACtJ7AOoASYmjZsQtNUA15/V/rtOlrsUWApQUlLiHU2TadydR35XyaWjBzJ/xuiwyxER6ZZUjxqalvRyIbAtGC4F7gyOHroWOOru+4BVwE1mNjTYSXxT0JYVnt9+kO0HjrPkuim6z4CI9Bmp7iP4spldCrQBe4C7gvaVwC1ABXAK+BCAu9eZ2ReBDcF0X3D3uhRryBiP/L6ScYP7cdtV48IuRUSk21IKAnf/607aHbink3HLgGWpLDcTbXr9COt31fGZW6eTF9N5eiLSd+gbK00e+d1rDC7MY9E1E7ueWEQkgygI0qDi4AlWv3qAxXMvoqhA1/ETkb5FQZAGj7+wm7xYDovfNinsUkREzpuCIEWHTjTxk43V3DpzLMMH6Gb0ItL3KAhS9MP/3c2pljifuF7XFBKRvklBkIITTa08/sJu5l0+motHDQy7HBGRC6IgSMHPNlVzrLGVu7U2ICJ9mILgArk7j72wh5kTBnP1xCFhlyMicsEUBBfohdcOU3HwBHfOnaTLSYhIn6YguECPvbCbof3zuHXm2LBLERFJiYLgAtTUN7D6lQMsml1Mv7xY2OWIiKREQXABHvvTbgDeN6c43EJERNJAQXCejp5q4cdr93DbW8YxYWj/sMsREUmZguA8PfbCbk41x/noO6eEXYqISFooCM7Tc+X7GTOoH1eMHxx2KSIiaaEgOA8HjzXy6r5j2jcgIllFQXAeHn6+AoAbLhsVciUiIumjIOgmd+e/X94HwPSxg0KuRkQkfRQE3fRyzVEOnWjm3/76SnJydCaxiGSPtASBmX3azNzMRgSvzcy+aWYVZvaSmc1Kmnaxme0MHovTsfze8OzmveTHclgwQ2cSi0h2Sfm+imY2EbgJeD2p+WZgWvCYA3wXmGNmw4DPASWAAxvNrNTdj6RaR09yd1a+vI/rLhnB4P55YZcjIpJW6Vgj+DpwH4kv9nYLgcc9YS0wxMzGAvOB1e5eF3z5rwYWpKGGHvXqvuPsO9rITdPHhF2KiEjapRQEZrYQqHH3LWeNGg9UJb2uDto6a89ov912AIDrLx0ZciUiIunX5aYhM1sDdPRT+EHgX0hsFko7M1sCLAEoLg73uP1fbd3PrOIhjBrUL9Q6RER6QpdrBO4+z92vOPsBVAKTgS1mthuYAGwyszFADTAxaTYTgrbO2jta7lJ3L3H3kpEjw/slXlV3ivKaYyy4QpuFRCQ7XfCmIXd/2d1Hufskd59EYjPPLHffD5QCdwZHD10LHHX3fcAq4CYzG2pmQ0msTaxKvRs9Z9XW/QDMn6EgEJHslPJRQ51YCdwCVACngA8BuHudmX0R2BBM9wV3r+uhGtJi1db9XD52EBcNLwq7FBGRHpG2IAjWCtqHHbink+mWAcvStdyeVHu8ibI9R/jUn18SdikiIj1GZxafw4bddbjDdZeMCLsUEZEeoyA4hz+9doj8WA7Tx+naQiKSvRQE57Bh1xGuKh5CQa7uSywi2UtB0Ildh06y/cBxbtZhoyKS5RQEndi0J3H5o3dcrP0DIpLdFASdeKHyMIV5MaaMHBB2KSIiPUpB0IHjjS08s7Gam68cQ0z3HhCRLKcg6MBz5Ymzif9y5riQKxER6XkKgg6s31XHsKJ8XW1URCJBQdCBTXuOcNXEIZhps5CIZD8FwVm27j1K5aGT3KC1ARGJCAXBWX7+Yg35sRxue0vG3y9HRCQtFARnWVtZx1svGqp7E4tIZCgIkjS3trF9/3FmThwcdikiIr1GQZBkx4HjNMfbmDFOQSAi0aEgSLJuV+IeOW+9aGjIlYiI9B4FQZJNe44wbnA/xg8pDLsUEZFeoyAINLXG+cPOWq7W2oCIRIyCILBpTz3HG1u58fLRYZciItKrUgoCM/u8mdWY2ebgcUvSuAfMrMLMtpvZ/KT2BUFbhZndn8ry02ndrsPkGNxw6aiwSxER6VXpuHn9193935MbzGw6sAiYAYwD1phZ+x3gHwZuBKqBDWZW6u6vpKGOlLz4ej1TRw7Q+QMiEjk9tWloIbDc3ZvcfRdQAcwOHhXuXunuzcDyYNpQxducTXuOMHvysLBLERHpdekIgnvN7CUzW2Zm7XtaxwNVSdNUB22dtb+JmS0xszIzK6utrU1DmZ17ueYox5tamTNleI8uR0QkE3UZBGa2xszKO3gsBL4LTAWuAvYBX0tXYe6+1N1L3L1k5MievQDclqp6AEp0xJCIRFCX+wjcfV53ZmRm3wd+GbysASYmjZ4QtHGO9tDsOHCcQf1yGTu4X9iliIj0ulSPGhqb9PLdQHkwXAosMrMCM5sMTAPWAxuAaWY22czySexQLk2lhnTYeeAEF48aoPsPiEgkpXrU0FfM7CrAgd3AxwHcfauZrQBeAVqBe9w9DmBm9wKrgBiwzN23plhDymrqG7hmkjYLiUg0pRQE7v6Bc4x7CHiog/aVwMpUlptOR042U1PfwF8PnxB2KSIioYj8mcWv7j8GwNUTh4RciYhIOCIfBDsPnABg+rhBIVciIhKOyAfBjgPHGVyYx6iBBWGXIiISisgHwRPrXmfckEIdMSQikRXpIKg72QzApOH9Q65ERCQ8kQ6CP+xIXLrirj+bGnIlIiLhiXQQrNt1mMGFeVw5XvcoFpHoinQQVB9pYNLw/uTkaP+AiERXpIOgpr6Bcbo/sYhEXGSDIN7mVNc1UKwdxSIScZENgr31DTTH25g8vCjsUkREQhXZINh16CQAk0YoCEQk2iIbBLsPJ4JgsoJARCIuskGw69BJ+ufHdGkJEYm8SAfBpOFFurSEiEReZINg+/7jXDJ6QNhliIiELpJBcLKplX1HG7lkzMCwSxERCV0kg2BvfQMA4wbrZDIRkUgGwY7gZjQXj9KmIRGRlIPAzD5pZtvMbKuZfSWp/QEzqzCz7WY2P6l9QdBWYWb3p7r8C/FabSIIpo5UEIiIpHTzejO7AVgIvMXdm8xsVNA+HVgEzADGAWvM7JLgbQ8DNwLVwAYzK3X3V1Kp43y9VnuC8UMKKcyP9eZiRUQyUkpBANwNfNndmwDc/WDQvhBYHrTvMrMKYHYwrsLdKwHMbHkwba8HwVRtFhIRAVLfNHQJ8E4zW2dmvzeza4L28UBV0nTVQVtn7W9iZkvMrMzMympra1Ms8wx3p7zmGFN0RrGICNCNNQIzWwOM6WDUg8H7hwHXAtcAK8xsSjoKc/elwFKAkpIST8c8AV7ddxyA8br8tIgI0I0gcPd5nY0zs7uBn7q7A+vNrA0YAdQAE5MmnRC0cY72XrG5qh6AGy4b1ZuLFRHJWKluGvo5cANAsDM4HzgElAKLzKzAzCYD04D1wAZgmplNNrN8EjuUS1Os4bzU1J8iN8d0sTkRkUCqO4uXAcvMrBxoBhYHawdbzWwFiZ3ArcA97h4HMLN7gVVADFjm7ltTrOG87DvayKiBBcR0e0oRESDFIHD3ZuD9nYx7CHiog/aVwMpUlpuK/UcbGTO4X1iLFxHJOJE7s3j/sUbG6tISIiKnRSoI3F1rBCIiZ4lUEBxrbOVUc5yxCgIRkdMiFQQ1RxJXHdUagYjIGZEKgvKaowBcNmZQyJWIiGSOSAXBi1VHGNQvV5eXEBFJEpkgeGLdHp5cX0Xx8P7k6BwCEZHTsj4IVm3dT+3xJr6xZicArx8+FXJFIiKZJdUzizNaY0ucj/9oI5eNGchVE4ew+pUD/POCy8IuS0Qko2T1GkFLvA2APYdPMXJgAf3zY7x/TnHIVYmIZJYsD4IzV69ubm1jaP98zLR/QEQkWVYHQUFuonsNLXGaW9vIiykERETOltVBUFRwZhfIoRNN5OdmdXdFRC5IZL4ZTza1KghERDqQ1UcNJdtSfTTsEkREMpJ+IouIRFykguCO2RO7nkhEJGIiFQSfvunSsEsQEck4kQqCfnmxsEsQEck4KQWBmT1lZpuDx24z25w07gEzqzCz7WY2P6l9QdBWYWb3p7L889VPRw2JiLxJqjev/7v2YTP7GnA0GJ4OLAJmAOOANWZ2STDpw8CNQDWwwcxK3f2VVOrortyYgkBE5GxpOXzUEtdteA/wrqBpIbDc3ZuAXWZWAcwOxlW4e2XwvuXBtL0SBCIi8mbp+on8TuCAu+8MXo8HqpLGVwdtnbW/iZktMbMyMyurra1NU5kiInK2LoPAzNaYWXkHj4VJk90BPJnOwtx9qbuXuHvJyJEjL3g+K//POym5aCiPf3h21xOLiERQl5uG3H3eucabWS5wO/DWpOYaIPmg/QlBG+do7xHTxw3imbvf1pOLEBHp09KxaWgesM3dq5PaSoFFZlZgZpOBacB6YAMwzcwmm1k+iR3KpWmoQURELlA6dhYv4qzNQu6+1cxWkNgJ3Arc4+5xADO7F1gFxIBl7r41DTWIiMgFMnfveqqQlZSUeFlZWdhliIj0KWa20d1LuppOB9aLiEScgkBEJOIUBCIiEacgEBGJOAWBiEjE9YmjhsysFtiTwixGAIfSVE5fEbU+R62/oD5HRSp9vsjdu7w0Q58IglSZWVl3DqHKJlHrc9T6C+pzVPRGn7VpSEQk4hQEIiIRF5UgWBp2ASGIWp+j1l9Qn6Oix/sciX0EIiLSuaisEYiISCeyOgjMbIGZbTezCjO7P+x6UmFmy8zsoJmVJ7UNM7PVZrYzeB4atJuZfTPo90tmNivpPYuD6Xea2eIw+tJdZjbRzJ43s1fMbKuZ/X3QnrX9NrN+ZrbezLYEff7XoH2yma0L+vZUcBl3gku9PxW0rzOzSUnzeiBo325m88PpUfeYWczMXjSzXwavs72/u83sZTPbbGZlQVt4n2t3z8oHictcvwZMAfKBLcD0sOtKoT/XAbOA8qS2rwD3B8P3A/8WDN8CPAcYcC2wLmgfBlQGz0OD4aFh9+0cfR4LzAqGBwI7gOnZ3O+g9gHBcB6wLujLCmBR0P4IcHcw/AngkWB4EfBUMDw9+MwXAJODfwuxsPt3jn7/I/BfwC+D19ne393AiLPaQvtcZ/MawWygwt0r3b0ZWA4s7OI9Gcvd/wDUndW8EHgsGH4M+Kuk9sc9YS0wxMzGAvOB1e5e5+5HgNXAgp6v/sK4+z533xQMHwdeJXGP66ztd1D7ieBlXvBw4F3AM0H72X1u/1s8A/y5mVnQvtzdm9x9F1BB4t9ExjGzCcBfAD8IXhtZ3N9zCO1znc1BMB6oSnpdHbRlk9Huvi8Y3g+MDoY763uf/ZsEmwCuJvELOav7HWwm2QwcJPGP+zWg3t1bg0mS6z/dt2D8UWA4favP3wDuA9qC18PJ7v5CItx/bWYbzWxJ0Bba5zoddyiTDODubmZZeQiYmQ0AfgJ8yt2PJX4AJmRjvz1xN7+rzGwI8DPgspBL6jFmditw0N03mtn1YdfTi97h7jVmNgpYbWbbkkf29uc6m9cIaoCJSa8nBG3Z5ECwikjwfDBo76zvfe5vYmZ5JELgCXf/adCc9f0GcPd64HlgLonNAe0/3JLrP923YPxg4DB9p89vB24zs90kNt++C/gPsre/ALh7TfB8kETYzybEz3U2B8EGYFpw9EE+iR1LpSHXlG6lQPuRAouBZ5Pa7wyONrgWOBqscq4CbjKzocERCTcFbRkp2Pb7KPCqu/+/pFFZ228zGxmsCWBmhcCNJPaNPA/8TTDZ2X1u/1v8DfBbT+xJLAUWBUfZTAamAet7pxfd5+5b6xnJAAAA50lEQVQPuPsEd59E4t/ob939fWRpfwHMrMjMBrYPk/g8lhPm5zrsvec9+SCxt30HiW2sD4ZdT4p9eRLYB7SQ2Bb4ERLbRn8D7ATWAMOCaQ14OOj3y0BJ0nw+TGJHWgXwobD71UWf30FiW+pLwObgcUs29xuYCbwY9Lkc+GzQPoXEF1sF8DRQELT3C15XBOOnJM3rweBvsR24Oey+daPv13PmqKGs7W/Qty3BY2v7d1OYn2udWSwiEnHZvGlIRES6QUEgIhJxCgIRkYhTEIiIRJyCQEQk4hQEIiIRpyAQEYk4BYGISMT9f7l+6e50lY+WAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(reward_log)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Good other blog posts.\n",
    "\n",
    "- https://www.freecodecamp.org/news/an-introduction-to-q-learning-reinforcement-learning-14ac0b4493cc/\n",
    "    - good diagrams\n",
    "- https://towardsdatascience.com/simple-reinforcement-learning-q-learning-fcddc4b6fe56\n",
    "    - simple and straight forward, it's nice to have code snippets to explain parts, perhaps it should be text, code, equation rather than text, equation, diagram. Perhaps look at which is best for the situtation"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
